# import the library that allow us manipulate with arrays
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
# accuracy score check how our model is performing
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
import hvplot.pandas
import seaborn as sns
from scipy import stats
pip install channels
Requirement already satisfied: channels in c:\users\itshop\anaconda3\lib\site-packages (3.0.4) Requirement already satisfied: Django>=2.2 in c:\users\itshop\anaconda3\lib\site-packages (from channels) (3.2.5) Requirement already satisfied: daphne<4,>=3.0 in c:\users\itshop\anaconda3\lib\site-packages (from channels) (3.0.2) Requirement already satisfied: asgiref<4,>=3.3.1 in c:\users\itshop\anaconda3\lib\site-packages (from channels) (3.4.1) Requirement already satisfied: autobahn>=0.18 in c:\users\itshop\anaconda3\lib\site-packages (from daphne<4,>=3.0->channels) (21.11.1) Requirement already satisfied: twisted[tls]>=18.7 in c:\users\itshop\anaconda3\lib\site-packages (from daphne<4,>=3.0->channels) (21.7.0) Note: you may need to restart the kernel to use updated packages. Requirement already satisfied: cryptography>=3.4.6 in c:\users\itshop\anaconda3\lib\site-packages (from autobahn>=0.18->daphne<4,>=3.0->channels) (3.4.8) Requirement already satisfied: hyperlink>=21.0.0 in c:\users\itshop\anaconda3\lib\site-packages (from autobahn>=0.18->daphne<4,>=3.0->channels) (21.0.0) Requirement already satisfied: txaio>=21.2.1 in c:\users\itshop\anaconda3\lib\site-packages (from autobahn>=0.18->daphne<4,>=3.0->channels) (21.2.1) Requirement already satisfied: setuptools in c:\users\itshop\anaconda3\lib\site-packages (from autobahn>=0.18->daphne<4,>=3.0->channels) (58.0.4) Requirement already satisfied: cffi>=1.12 in c:\users\itshop\anaconda3\lib\site-packages (from cryptography>=3.4.6->autobahn>=0.18->daphne<4,>=3.0->channels) (1.14.6) Requirement already satisfied: pycparser in c:\users\itshop\anaconda3\lib\site-packages (from cffi>=1.12->cryptography>=3.4.6->autobahn>=0.18->daphne<4,>=3.0->channels) (2.20) Requirement already satisfied: sqlparse>=0.2.2 in c:\users\itshop\anaconda3\lib\site-packages (from Django>=2.2->channels) (0.4.1) Requirement already satisfied: pytz in c:\users\itshop\anaconda3\lib\site-packages (from Django>=2.2->channels) (2021.3)
pip install xgboost
Collecting xgboost Note: you may need to restart the kernel to use updated packages. Downloading xgboost-1.5.1-py3-none-win_amd64.whl (106.6 MB) Requirement already satisfied: numpy in c:\users\itshop\anaconda3\lib\site-packages (from xgboost) (1.20.3) Requirement already satisfied: scipy in c:\users\itshop\anaconda3\lib\site-packages (from xgboost) (1.7.1) Installing collected packages: xgboost Successfully installed xgboost-1.5.1
!pip install -q hvplot
Requirement already satisfied: zope.interface>=4.4.2 in c:\users\itshop\anaconda3\lib\site-packages (from twisted[tls]>=18.7->daphne<4,>=3.0->channels) (5.4.0) Requirement already satisfied: twisted-iocpsupport~=1.0.0 in c:\users\itshop\anaconda3\lib\site-packages (from twisted[tls]>=18.7->daphne<4,>=3.0->channels) (1.0.2) Requirement already satisfied: attrs>=19.2.0 in c:\users\itshop\anaconda3\lib\site-packages (from twisted[tls]>=18.7->daphne<4,>=3.0->channels) (21.2.0) Requirement already satisfied: incremental>=21.3.0 in c:\users\itshop\anaconda3\lib\site-packages (from twisted[tls]>=18.7->daphne<4,>=3.0->channels) (21.3.0) Requirement already satisfied: constantly>=15.1 in c:\users\itshop\anaconda3\lib\site-packages (from twisted[tls]>=18.7->daphne<4,>=3.0->channels) (15.1.0) Requirement already satisfied: pyopenssl>=16.0.0 in c:\users\itshop\anaconda3\lib\site-packages (from twisted[tls]>=18.7->daphne<4,>=3.0->channels) (21.0.0) Requirement already satisfied: service-identity>=18.1.0 in c:\users\itshop\anaconda3\lib\site-packages (from twisted[tls]>=18.7->daphne<4,>=3.0->channels) (21.1.0) Requirement already satisfied: six in c:\users\itshop\anaconda3\lib\site-packages (from Automat>=0.8.0->twisted[tls]>=18.7->daphne<4,>=3.0->channels) (1.16.0) Requirement already satisfied: pyasn1-modules in c:\users\itshop\anaconda3\lib\site-packages (from service-identity>=18.1.0->twisted[tls]>=18.7->daphne<4,>=3.0->channels) (0.2.8) Requirement already satisfied: pyasn1 in c:\users\itshop\anaconda3\lib\site-packages (from service-identity>=18.1.0->twisted[tls]>=18.7->daphne<4,>=3.0->channels) (0.4.8)
#loading the heart_patient_data we have
heart_data=pd.read_csv('heart.csv')
# showing the first rows of the data to make sure it is saved
heart_data.head()
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 63 | 1 | 3 | 145 | 233 | 1 | 0 | 150 | 0 | 2.3 | 0 | 0 | 1 | 1 |
| 1 | 37 | 1 | 2 | 130 | 250 | 0 | 1 | 187 | 0 | 3.5 | 0 | 0 | 2 | 1 |
| 2 | 41 | 0 | 1 | 130 | 204 | 0 | 0 | 172 | 0 | 1.4 | 2 | 0 | 2 | 1 |
| 3 | 56 | 1 | 1 | 120 | 236 | 0 | 1 | 178 | 0 | 0.8 | 2 | 0 | 2 | 1 |
| 4 | 57 | 0 | 0 | 120 | 354 | 0 | 1 | 163 | 1 | 0.6 | 2 | 0 | 2 | 1 |
# showing the last five rows in our data and now we know the number of the rows the dataset is having which is 302 rows
heart_data.tail()
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 298 | 57 | 0 | 0 | 140 | 241 | 0 | 1 | 123 | 1 | 0.2 | 1 | 0 | 3 | 0 |
| 299 | 45 | 1 | 3 | 110 | 264 | 0 | 1 | 132 | 0 | 1.2 | 1 | 0 | 3 | 0 |
| 300 | 68 | 1 | 0 | 144 | 193 | 1 | 1 | 141 | 0 | 3.4 | 1 | 2 | 3 | 0 |
| 301 | 57 | 1 | 0 | 130 | 131 | 0 | 1 | 115 | 1 | 1.2 | 1 | 1 | 3 | 0 |
| 302 | 57 | 0 | 1 | 130 | 236 | 0 | 0 | 174 | 0 | 0.0 | 1 | 1 | 2 | 0 |
# trying to figure out the shape of the datasets
heart_data.shape
(303, 14)
#getting the full information we want to know about the datasets including datatypes and every detail
heart_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 303 entries, 0 to 302 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 age 303 non-null int64 1 sex 303 non-null int64 2 cp 303 non-null int64 3 trestbps 303 non-null int64 4 chol 303 non-null int64 5 fbs 303 non-null int64 6 restecg 303 non-null int64 7 thalach 303 non-null int64 8 exang 303 non-null int64 9 oldpeak 303 non-null float64 10 slope 303 non-null int64 11 ca 303 non-null int64 12 thal 303 non-null int64 13 target 303 non-null int64 dtypes: float64(1), int64(13) memory usage: 33.3 KB
#checking whether we are having missing values in the datasets in order to check the manipulation
heart_data.isnull().sum()
age 0 sex 0 cp 0 trestbps 0 chol 0 fbs 0 restecg 0 thalach 0 exang 0 oldpeak 0 slope 0 ca 0 thal 0 target 0 dtype: int64
# showing the stastical measure about the data we are having like the standard deviations, the mean, median and all the stastical
# measures we are having
# for example here we are having the mean value of all the columns with the minimum and maximum values of each row
# other point we now know that 25 % of the persons are under 47.5 age
heart_data.describe()
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | target | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 | 303.000000 |
| mean | 54.366337 | 0.683168 | 0.966997 | 131.623762 | 246.264026 | 0.148515 | 0.528053 | 149.646865 | 0.326733 | 1.039604 | 1.399340 | 0.729373 | 2.313531 | 0.544554 |
| std | 9.082101 | 0.466011 | 1.032052 | 17.538143 | 51.830751 | 0.356198 | 0.525860 | 22.905161 | 0.469794 | 1.161075 | 0.616226 | 1.022606 | 0.612277 | 0.498835 |
| min | 29.000000 | 0.000000 | 0.000000 | 94.000000 | 126.000000 | 0.000000 | 0.000000 | 71.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 47.500000 | 0.000000 | 0.000000 | 120.000000 | 211.000000 | 0.000000 | 0.000000 | 133.500000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 2.000000 | 0.000000 |
| 50% | 55.000000 | 1.000000 | 1.000000 | 130.000000 | 240.000000 | 0.000000 | 1.000000 | 153.000000 | 0.000000 | 0.800000 | 1.000000 | 0.000000 | 2.000000 | 1.000000 |
| 75% | 61.000000 | 1.000000 | 2.000000 | 140.000000 | 274.500000 | 0.000000 | 1.000000 | 166.000000 | 1.000000 | 1.600000 | 2.000000 | 1.000000 | 3.000000 | 1.000000 |
| max | 77.000000 | 1.000000 | 3.000000 | 200.000000 | 564.000000 | 1.000000 | 2.000000 | 202.000000 | 1.000000 | 6.200000 | 2.000000 | 4.000000 | 3.000000 | 1.000000 |
# now seeing the distribution of the target column in the datasets
heart_data['target'].value_counts()
1 165 0 138 Name: target, dtype: int64
# we see, now we are having that 165 that are having a disease and 138 are healthy, the distribution is not very high from each other
# as the distribution in one class is not very large more than the other class, so we don't need preprocessing data
heart_data.target.value_counts().hvplot.bar(
title="Heart Disease Patients", xlabel='Heart Disease', ylabel='The numbers of the patients found',
width=500, height=350
)
# now getting the graph of the patient distribution on the graph
# trying to graph the healthy and pateint againtst the sex classification
Non_healthy_patient=heart_data.loc[heart_data['target']==1,'sex'].value_counts().hvplot.bar(alpha=0.4)
healthy_patient=heart_data.loc[heart_data['target']==0,'sex'].value_counts().hvplot.bar(alpha=0.4)
(Non_healthy_patient*healthy_patient).opts(
title='relation between heart disease and sex',xlabel='sex',ylabel='numbers',width=450,height=450,legend_cols=2)
# trying to graph the healthy and pateint againtst the chest pain classification
Non_healthy_patient_chest_pain=heart_data.loc[heart_data['target']==1,'cp'].value_counts().hvplot.bar(alpha=0.4)
healthy_patient_chest_pain=heart_data.loc[heart_data['target']==0,'cp'].value_counts().hvplot.bar(alpha=0.4)
(Non_healthy_patient_chest_pain*healthy_patient_chest_pain).opts(
title='relation between heart disease and chest pain',xlabel='chest pain',ylabel='numbers',width=450,height=450,legend_cols=2)
# trying to graph the healthy and pateint againtst the blood sugar classification
Non_healthy_patient_fasting_blood_sugar=heart_data.loc[heart_data['target']==1,'fbs'].value_counts().hvplot.bar(alpha=0.4)
healthy_patient_fasting_blood_suga=heart_data.loc[heart_data['target']==0,'fbs'].value_counts().hvplot.bar(alpha=0.4)
(Non_healthy_patient_fasting_blood_sugar*healthy_patient_fasting_blood_suga).opts(
title='relation between heart disease and fasting blood sugar',xlabel='fbs',ylabel='numbers',width=450,height=450,legend_cols=2)
# trying to graph the healthy and pateint againtst the resting electrocardiograhic classification
Non_healthy_patient_resting_electrocardiograhic=heart_data.loc[heart_data['target']==1,'restecg'].value_counts().hvplot.bar(alpha=0.4)
healthy_patientt_resting_electrocardiograhic=heart_data.loc[heart_data['target']==0,'restecg'].value_counts().hvplot.bar(alpha=0.4)
(Non_healthy_patient_resting_electrocardiograhic*healthy_patientt_resting_electrocardiograhic).opts(
title='relation between heart disease and resting electrocardiograhic',xlabel='resting electrocardiograhic',ylabel='numbers',width=550,height=550,legend_cols=2)
#now trying to get the categorial value
categorial_values=[]
non_categorial_values=[]
for i in heart_data.columns:
if(len(heart_data[i].unique())<=10):
categorial_values.append(i)
else:
non_categorial_values.append(i)
categorial_values
['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal', 'target']
non_categorial_values
['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
#plotting the histogram data against the catgegorial values
plt.figure(figsize=(15,5))
for i,j in enumerate (categorial_values,1):
plt.subplot(3,3,i)
heart_data[heart_data["target"]==0][j].hist(bins=35,color='blue',label="Healthy patient",alpha=0.6)
heart_data[heart_data["target"]==0][j].hist(bins=35,color='Red',label="non-Healthy patient",alpha=0.6)
plt.legend()
plt.xlabel(j)
#plotting the histogram data against the non-catgegorial values
plt.figure(figsize=(15,5))
for i,j in enumerate (non_categorial_values,1):
plt.subplot(3,3,i)
heart_data[heart_data["target"]==0][j].hist(bins=35,color='blue',label="Healthy patient",alpha=0.6)
heart_data[heart_data["target"]==0][j].hist(bins=35,color='Red',label="non-Healthy patient",alpha=0.6)
plt.legend()
plt.xlabel(j)
plt.figure(figsize=(10,10))
plt.scatter(heart_data.age[heart_data.target==1],heart_data.thalach[heart_data.target==1],c="red")
plt.scatter(heart_data.age[heart_data.target==0],heart_data.thalach[heart_data.target==0],c="blue")
plt.title("heart disease relation between age and the max heart rate")
plt.xlabel("Age")
plt.ylabel("maximum heart date")
plt.legend(["Non-Healthy","Healthy"]);
corrleation_matrix=heart_data.corr()
fig,ax=plt.subplots(figsize=(10,10))
ax=sns.heatmap(corrleation_matrix,annot=True,linewidths=0.8,fmt=".2f",cmap="YlGnBu");
bottom,top=ax.get_ylim()
ax.set_ylim(bottom+0.5,top-0.5)
(14.5, -0.5)
heart_data.drop('target',axis=1).corrwith(heart_data.target).hvplot.barh(width=500,height=500,title="correlatiob between heart disease and the categroial features",
ylabel="correlation", xlabel="Categorial features",)
# now creating the features that based upon it we are going to determine the result of the heart disease['Target'], along with
# splitting the target which is the result
#Features_data
| age | sex | cp | trestbps | chol | fbs | restecg | thalach | exang | oldpeak | slope | ca | thal | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 63 | 1 | 3 | 145 | 233 | 1 | 0 | 150 | 0 | 2.3 | 0 | 0 | 1 |
| 1 | 37 | 1 | 2 | 130 | 250 | 0 | 1 | 187 | 0 | 3.5 | 0 | 0 | 2 |
| 2 | 41 | 0 | 1 | 130 | 204 | 0 | 0 | 172 | 0 | 1.4 | 2 | 0 | 2 |
| 3 | 56 | 1 | 1 | 120 | 236 | 0 | 1 | 178 | 0 | 0.8 | 2 | 0 | 2 |
| 4 | 57 | 0 | 0 | 120 | 354 | 0 | 1 | 163 | 1 | 0.6 | 2 | 0 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 298 | 57 | 0 | 0 | 140 | 241 | 0 | 1 | 123 | 1 | 0.2 | 1 | 0 | 3 |
| 299 | 45 | 1 | 3 | 110 | 264 | 0 | 1 | 132 | 0 | 1.2 | 1 | 0 | 3 |
| 300 | 68 | 1 | 0 | 144 | 193 | 1 | 1 | 141 | 0 | 3.4 | 1 | 2 | 3 |
| 301 | 57 | 1 | 0 | 130 | 131 | 0 | 1 | 115 | 1 | 1.2 | 1 | 1 | 3 |
| 302 | 57 | 0 | 1 | 130 | 236 | 0 | 0 | 174 | 0 | 0.0 | 1 | 1 | 2 |
303 rows × 13 columns
Target_data
0 1
1 1
2 1
3 1
4 1
..
298 0
299 0
300 0
301 0
302 0
Name: target, Length: 303, dtype: int64
# The data is now reserved in the variables
# now we have to feed the data to our machine learning algorithm, but before that we need to split the data into train data and
# test data in order to work on the model
#Features_data_train,Features_data_test,Target_data_train,Target_data_test=train_test_split(Features_data,Target_data, test_size=0.3, stratify=Target_data, random_state=42)
#Features_data_train
#checking the shape after the train-test model
##print(Features_data.shape,Features_data_train.shape,Features_data_test.shape)
#Logistic_model=LogisticRegression()
# trying to train the machine learning model using the train data
#Logistic_model.fit(Features_data_train,Target_data_train)
# train the model to find the relation between the features and the target in order to have the model trained
# Evaluating the model to check how it will perform
#Features_data_train_perdiction = Logistic_model_fit.predict(Features_data_train)
#checking the score of the train data predicted
#Accuracy_score_of_predicted_train_data= accuracy_score(Features_data_train_perdiction, Target_data_train)
#Accuracy_score_of_predicted_train_data
# getting the accuracy score of the training data
#Features_data_test_perdiction = Logistic_model_fit.predict(Features_data_test)
#checking the score of the test data predicted
#Accuracy_score_of_predicted_test_data= accuracy_score(Features_data_test_perdiction, Target_data_test)
#Accuracy_score_of_predicted_test_data
# now building the predictio system to check the health state of the patient
#Criteria_data=(38,1,2,138,175,0,1,173,0,0,2,4,2)
# getting the data that based on it we will determine if the target is one or not
#Criteria_data_shaped=np.asarray(Criteria_data)
# converting the data into array so we could work on it
# now trying to reshape the value to get one value only
#Criteria_data_shaped_one=Criteria_data_shaped.reshape(1,-1)
#Prediction_of_model=Logistic_model.predict(Criteria_data_shaped_one)
# print(Prediction_of_model)
# if(Prediction_of_model[0]==1):
# print("the person is having a heart disease")
# else:
# print("the person is healthy and not having a defect")
# the system have now predicted that the patient is not healthy and having a defect which is true if we checked the target value
# in the original dataset
#categorial_values.remove('target')
#heart_data = pd.get_dummies(heart_data, columns = categorial_values)
from sklearn.preprocessing import StandardScaler
s_sc = StandardScaler()
col_to_scale = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
heart_data[col_to_scale] = s_sc.fit_transform(heart_data[col_to_scale])
# heart_data.head()
def scoreshow(model,Features_data_train,Target_data_train,Features_data_test,Target_data_test,train=True):
if (train==True):
Features_data_train_perdiction = model.predict(Features_data_train)
Accuracy_score_of_predicted_train_data= accuracy_score(Target_data_train, Features_data_train_perdiction)
Report_train=pd.DataFrame(classification_report(Target_data_train,Features_data_train_perdiction,output_dict=True))
print(f"Accuracy Score: {Accuracy_score_of_predicted_train_data}")
#print("the accuracy score = ", {Accuracy_score_of_predicted_train_data})
print(f"Report of classification:\n {Report_train}")
print(f"confusion matrix: \n {confusion_matrix(Target_data_train,Features_data_train_perdiction)}\n")
elif(train==False):
Features_data_test_perdiction = model.predict(Features_data_test)
Accuracy_score_of_predicted_test_data= accuracy_score(Target_data_test, Features_data_test_perdiction)
Report_test=pd.DataFrame(classification_report(Target_data_test,Features_data_test_perdiction,output_dict=True))
print(f"Accuracy Score: {Accuracy_score_of_predicted_test_data}")
#print("the accuracy score = ", {Accuracy_score_of_predicted_test_data})
print(f"Report of classification\n {Report_test}")
print(f"confusion matrix :\n {confusion_matrix(Target_data_test,Features_data_test_perdiction)}\n")
Features_data=heart_data.drop(columns='target',axis=1)
Target_data= heart_data['target']
Features_data_train,Features_data_test,Target_data_train,Target_data_test=train_test_split(Features_data,Target_data, test_size=0.3, stratify=Target_data, random_state=42)
Logistic_model=LogisticRegression()
Logistic_model.fit(Features_data_train,Target_data_train)
print("With the train data\n\n")
scoreshow(Logistic_model,Features_data_train,Target_data_train,Features_data_test,Target_data_test,train=True)
print("Without the train data\n\n")
scoreshow(Logistic_model,Features_data_train,Target_data_train,Features_data_test,Target_data_test,train=False)
With the train data
Accuracy Score: 0.8773584905660378
Report of classification:
0 1 accuracy macro avg weighted avg
precision 0.908046 0.856000 0.877358 0.882023 0.879813
recall 0.814433 0.930435 0.877358 0.872434 0.877358
f1-score 0.858696 0.891667 0.877358 0.875181 0.876581
support 97.000000 115.000000 0.877358 212.000000 212.000000
confusion matrix:
[[ 79 18]
[ 8 107]]
Without the train data
Accuracy Score: 0.7582417582417582
Report of classification
0 1 accuracy macro avg weighted avg
precision 0.756757 0.759259 0.758242 0.758008 0.758132
recall 0.682927 0.820000 0.758242 0.751463 0.758242
f1-score 0.717949 0.788462 0.758242 0.753205 0.756692
support 41.000000 50.000000 0.758242 91.000000 91.000000
confusion matrix :
[[28 13]
[ 9 41]]
from sklearn.neighbors import KNeighborsClassifier
KNeighbors_model=KNeighborsClassifier()
KNeighbors_model.fit(Features_data_train,Target_data_train)
print("With the train data\n\n")
scoreshow(KNeighbors_model,Features_data_train,Target_data_train,Features_data_test,Target_data_test,train=True)
print("Without the train data\n\n")
scoreshow(KNeighbors_model,Features_data_train,Target_data_train,Features_data_test,Target_data_test,train=False)
With the train data
Accuracy Score: 0.8679245283018868
Report of classification:
0 1 accuracy macro avg weighted avg
precision 0.887640 0.853659 0.867925 0.870649 0.869207
recall 0.814433 0.913043 0.867925 0.863738 0.867925
f1-score 0.849462 0.882353 0.867925 0.865908 0.867304
support 97.000000 115.000000 0.867925 212.000000 212.000000
confusion matrix:
[[ 79 18]
[ 10 105]]
Without the train data
Accuracy Score: 0.7692307692307693
Report of classification
0 1 accuracy macro avg weighted avg
precision 0.794118 0.754386 0.769231 0.774252 0.772287
recall 0.658537 0.860000 0.769231 0.759268 0.769231
f1-score 0.720000 0.803738 0.769231 0.761869 0.766010
support 41.000000 50.000000 0.769231 91.000000 91.000000
confusion matrix :
[[27 14]
[ 7 43]]
from sklearn.svm import SVC
SVM_model=SVC(kernel='rbf',gamma=0.1,C=1.0)
SVM_model.fit(Features_data_train,Target_data_train)
print("With the train data\n\n")
scoreshow(SVM_model,Features_data_train,Target_data_train,Features_data_test,Target_data_test,train=True)
print("Without the train data\n\n")
scoreshow(SVM_model,Features_data_train,Target_data_train,Features_data_test,Target_data_test,train=False)
With the train data
Accuracy Score: 0.910377358490566
Report of classification:
0 1 accuracy macro avg weighted avg
precision 0.933333 0.893443 0.910377 0.913388 0.911695
recall 0.865979 0.947826 0.910377 0.906903 0.910377
f1-score 0.898396 0.919831 0.910377 0.909113 0.910023
support 97.000000 115.000000 0.910377 212.000000 212.000000
confusion matrix:
[[ 84 13]
[ 6 109]]
Without the train data
Accuracy Score: 0.7692307692307693
Report of classification
0 1 accuracy macro avg weighted avg
precision 0.794118 0.754386 0.769231 0.774252 0.772287
recall 0.658537 0.860000 0.769231 0.759268 0.769231
f1-score 0.720000 0.803738 0.769231 0.761869 0.766010
support 41.000000 50.000000 0.769231 91.000000 91.000000
confusion matrix :
[[27 14]
[ 7 43]]
from sklearn.tree import DecisionTreeClassifier
Tree_decision_model = DecisionTreeClassifier(random_state=42)
Tree_decision_model.fit(Features_data_train, Target_data_train)
print("With the train data\n\n")
scoreshow(Tree_decision_model, Features_data_train, Target_data_train, Features_data_test, Target_data_test, train=True)
print("Without the train data\n\n")
scoreshow(Tree_decision_model, Features_data_train, Target_data_train, Features_data_test, Target_data_test, train=False)
With the train data
Accuracy Score: 1.0
Report of classification:
0 1 accuracy macro avg weighted avg
precision 1.0 1.0 1.0 1.0 1.0
recall 1.0 1.0 1.0 1.0 1.0
f1-score 1.0 1.0 1.0 1.0 1.0
support 97.0 115.0 1.0 212.0 212.0
confusion matrix:
[[ 97 0]
[ 0 115]]
Without the train data
Accuracy Score: 0.7252747252747253
Report of classification
0 1 accuracy macro avg weighted avg
precision 0.681818 0.765957 0.725275 0.723888 0.728049
recall 0.731707 0.720000 0.725275 0.725854 0.725275
f1-score 0.705882 0.742268 0.725275 0.724075 0.725874
support 41.000000 50.000000 0.725275 91.000000 91.000000
confusion matrix :
[[30 11]
[14 36]]
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
Random_forest_classifier=RandomForestClassifier(n_estimators=1000, random_state=42)
Random_forest_classifier.fit(Features_data_train,Target_data_train)
print("With the train data\n\n")
scoreshow(Random_forest_classifier, Features_data_train, Target_data_train, Features_data_test, Target_data_test, train=True)
print("Without the train data\n\n")
scoreshow(Random_forest_classifier, Features_data_train, Target_data_train, Features_data_test, Target_data_test, train=False)
With the train data
Accuracy Score: 1.0
Report of classification:
0 1 accuracy macro avg weighted avg
precision 1.0 1.0 1.0 1.0 1.0
recall 1.0 1.0 1.0 1.0 1.0
f1-score 1.0 1.0 1.0 1.0 1.0
support 97.0 115.0 1.0 212.0 212.0
confusion matrix:
[[ 97 0]
[ 0 115]]
Without the train data
Accuracy Score: 0.8021978021978022
Report of classification
0 1 accuracy macro avg weighted avg
precision 0.810811 0.796296 0.802198 0.803554 0.802836
recall 0.731707 0.860000 0.802198 0.795854 0.802198
f1-score 0.769231 0.826923 0.802198 0.798077 0.800930
support 41.000000 50.000000 0.802198 91.000000 91.000000
confusion matrix :
[[30 11]
[ 7 43]]
from xgboost import XGBClassifier
xgboost_model=XGBClassifier(use_label_encoder=False)
xgboost_model.fit(Features_data_train,Target_data_train)
print("With the train data\n\n")
scoreshow(xgboost_model, Features_data_train, Target_data_train, Features_data_test, Target_data_test, train=True)
print("Without the train data\n\n")
scoreshow(xgboost_model, Features_data_train, Target_data_train, Features_data_test, Target_data_test, train=False)
[15:32:38] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.5.1/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
With the train data
Accuracy Score: 1.0
Report of classification:
0 1 accuracy macro avg weighted avg
precision 1.0 1.0 1.0 1.0 1.0
recall 1.0 1.0 1.0 1.0 1.0
f1-score 1.0 1.0 1.0 1.0 1.0
support 97.0 115.0 1.0 212.0 212.0
confusion matrix:
[[ 97 0]
[ 0 115]]
Without the train data
Accuracy Score: 0.7362637362637363
Report of classification
0 1 accuracy macro avg weighted avg
precision 0.707317 0.76 0.736264 0.733659 0.736264
recall 0.707317 0.76 0.736264 0.733659 0.736264
f1-score 0.707317 0.76 0.736264 0.733659 0.736264
support 41.000000 50.00 0.736264 91.000000 91.000000
confusion matrix :
[[29 12]
[12 38]]
Criteria_data=(38,1,2,138,175,0,1,173,0,0,2,4,2)
# getting the data that based on it we will determine if the target is one or not
Criteria_data_shaped=np.asarray(Criteria_data)
#converting the data into array so we could work on it
Criteria_data_shaped_one=Criteria_data_shaped.reshape(1,-1)
Prediction_of_model=Logistic_model.predict(Criteria_data_shaped_one)
Prediction_of_model
array([1], dtype=int64)
# as we can see the model here predicted that the patient is having heart disease from feeding the data to the training model